1 Load Packages

if (!require("knitr")) {install.packages("knitr"); require("knitr")}
if (!requireNamespace('BiocManager', quietly = TRUE)) {install.packages('BiocManager'); require("BiocManager")}
if (!require("dplyr")) {install.packages("dplyr"); require("dplyr")}
if (!require("stringr")) {install.packages("stringr"); require("stringr")}
if (!require("Seurat")) {install.packages("Seurat"); require("Seurat")}
if (!require("sctransform")) {install.packages("sctransform"); require("sctransform")}
if (!require("glmGamPoi")) {BiocManager::install('glmGamPoi'); require("glmGamPoi")}
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")}
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("EnhancedVolcano")) {BiocManager::install('EnhancedVolcano'); require("EnhancedVolcano")}
if (!require("DESeq2")) {BiocManager::install('DESeq2'); require("DESeq2")}
if (!require("tidyverse")) {install.packages("tidyverse"); require("tidyverse")}
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")}
if (!require("car")) {install.packages("car"); require("car")}
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")}
if (!require("readxl")) {install.packages("readxl"); require("readxl")}
if (!require("ggrepel")) {install.packages("ggrepel"); require("ggrepel")}
if (!require("gghighlight")) {install.packages("gghighlight"); require("gghighlight")}
if (!require("ggpmisc")) {install.packages("ggpmisc"); require("ggpmisc")}
if (!require("data.table")) {install.packages("data.table"); require("data.table")}
if (!require("here")) {install.packages("here"); require("here")}

options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976
#if (!requireNamespace("BiocManager", quietly=TRUE))
#    install.packages("BiocManager")
# To support paralell execution:
#BiocManager::install(c("doMC", "doRNG","doSNOW"))

# For the main example:
#BiocManager::install(c("mixtools", "SummarizedExperiment"))

# For the examples in the follow-up section of the tutorial:
#BiocManager::install(c("DT", "plotly", "NMF", "d3heatmap",
#                       "dynamicTreeCut", "R2HTML", "Rtsne", "zoo"))

#if (!require("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
# BiocManager::install("AUCell")

library(AUCell)
library(GSEABase)

here()
## [1] "C:/Users/jonat/OneDrive - University of Southern California/Github/jga"

2 Loading Filtered Seurat Object

These files are post-DoubletFinder.

SO <- readRDS(here("datasets", "KPMP_Stroma.rds"))

DimPlot(SO)

geneSets <- readRDS(here("geneSets_All_human.rds"))
exprMatrix <- GetAssayData(SO, slot = "data")
exprMatrix[1:5, 1:5]
## 5 x 5 sparse Matrix of class "dgCMatrix"
##          KB1_AAACCCAAGGTTATAG KB1_AAACCCATCAGGTGTT KB1_AAACCCATCTTAAGGC
## A1BG                 .                    .                   .        
## A1BG-AS1             .                    1.292535            0.9851203
## A1CF                 .                    .                   .        
## A2M                  1.684869             1.292535            1.7974767
## A2M-AS1              .                    .                   .        
##          KB1_AAACGCTGTGTCCACG KB1_AAATGGAGTCATTGCA
## A1BG                        .                    .
## A1BG-AS1                    .                    .
## A1CF                        .                    .
## A2M                         .                    .
## A2M-AS1                     .                    .
cells_AUC <- AUCell_run(exprMatrix, geneSets)

cells_rankings <- AUCell_buildRankings(exprMatrix, plotStats=FALSE)
cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings)

cells_rankings <- AUCell_buildRankings(exprMatrix, plotStats=TRUE)

##     min      1%      5%     10%     50%    100% 
##  401.00  426.00  493.95  573.00 1397.00 7368.00
cells_rankings
## Ranking for 29732 genes (rows) and 18740 cells (columns).
## 
## Top-left corner of the ranking:
##           cells
## genes      KB1_AAACCCAAGGTTATAG KB1_AAACCCATCAGGTGTT KB1_AAACCCATCTTAAGGC
##   A1BG                    26654                14059                22592
##   A1BG-AS1                 5952                 1334                 2282
##   A1CF                    25856                16477                17713
##   A2M                      1255                 2114                  474
##   A2M-AS1                 17460                23082                 5417
##   A2ML1                   19914                25598                22810
##           cells
## genes      KB1_AAACGCTGTGTCCACG KB1_AAATGGAGTCATTGCA
##   A1BG                    26186                 9748
##   A1BG-AS1                13354                 7487
##   A1CF                    23758                22995
##   A2M                     16507                13809
##   A2M-AS1                 12037                 9582
##   A2ML1                   16993                13977
## 
## Quantiles for the number of genes detected by cell:
##     min      1%      5%     10%     50%    100% 
##  401.00  426.00  493.95  573.00 1397.00 7368.00
cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings)

set.seed(333)
par(mfrow=c(3,3)) 
cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, assign=TRUE)

## Create Common Metadata Columns

cellsAssigned <- lapply(cells_assignment, function(x) x$assignment)
assignmentTable <- reshape2::melt(cellsAssigned, value.name="cell")
colnames(assignmentTable)[2] <- "geneSet"
head(assignmentTable)
# move cell to rownames

any(duplicated(assignmentTable$cell))
## [1] TRUE
# assignmentTable$cell[duplicated(assignmentTable$cell)]

assignmentTable <- assignmentTable[!duplicated(assignmentTable$cell), ]

# Remove existing row names
rownames(assignmentTable) <- NULL

# Now convert "cell" column to row names
assignmentTable <- assignmentTable %>% column_to_rownames(var = "cell")


SO <- AddMetaData(SO, assignmentTable)

SO@meta.data
DimPlot(SO)

DimPlot(SO, group.by = "geneSet")

table(SO@meta.data$subclass.l2, SO@meta.data$geneSet)
##          
##           h_afferent h_efferent h_fiba h_fibb h_fibc h_fibd h_pericyte h_renin
##   aFIB             0          1      1      6    128      0          0       0
##   cycMYOF          0          0      0      1      0      0          0       0
##   dFIB             0          0      0      0      0      0          0       0
##   dM-FIB           0          0      0      0      0      0          0       0
##   dVSMC            1          0      0      0      0      0          0       0
##   FIB              0          0    148     47     58      0          0       0
##   M-FIB            1          0    112    539      0    389          0       0
##   MC               0          2      0      0      0      0          0       5
##   MYOF           122         43      8     29     38      0          1       1
##   REN              2          6      0      0      0      0          1       5
##   VSMC           110        366      0      0      0      0         74       5
##   VSMC/P           8         90      2      2      0      4        103       4

3 Session Info

sessionInfo()
## R version 4.4.3 (2025-02-28 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] GSEABase_1.68.0             graph_1.84.1               
##  [3] annotate_1.84.0             XML_3.99-0.18              
##  [5] AnnotationDbi_1.68.0        AUCell_1.28.0              
##  [7] here_1.0.1                  data.table_1.17.0          
##  [9] ggpmisc_0.6.1               ggpp_0.5.8-1               
## [11] gghighlight_0.4.1           readxl_1.4.5               
## [13] openxlsx_4.2.8              car_3.1-3                  
## [15] carData_3.0-5               RColorBrewer_1.1-3         
## [17] lubridate_1.9.4             forcats_1.0.0              
## [19] purrr_1.0.4                 readr_2.1.5                
## [21] tidyr_1.3.1                 tibble_3.2.1               
## [23] tidyverse_2.0.0             DESeq2_1.46.0              
## [25] SummarizedExperiment_1.36.0 Biobase_2.66.0             
## [27] MatrixGenerics_1.18.1       matrixStats_1.5.0          
## [29] GenomicRanges_1.58.0        GenomeInfoDb_1.42.3        
## [31] IRanges_2.40.1              S4Vectors_0.44.0           
## [33] BiocGenerics_0.52.0         EnhancedVolcano_1.24.0     
## [35] ggrepel_0.9.6               ggplot2_3.5.1              
## [37] patchwork_1.3.0             glmGamPoi_1.18.0           
## [39] sctransform_0.4.1           SeuratObject_5.0.2         
## [41] Seurat_4.4.0                stringr_1.5.1              
## [43] dplyr_1.1.4                 knitr_1.50                 
## 
## loaded via a namespace (and not attached):
##   [1] RcppAnnoy_0.0.22          splines_4.4.3            
##   [3] later_1.4.1               R.oo_1.27.0              
##   [5] cellranger_1.1.0          polyclip_1.10-7          
##   [7] lifecycle_1.0.4           rprojroot_2.0.4          
##   [9] globals_0.16.3            lattice_0.22-6           
##  [11] MASS_7.3-64               magrittr_2.0.3           
##  [13] plotly_4.10.4             sass_0.4.9               
##  [15] rmarkdown_2.29            jquerylib_0.1.4          
##  [17] yaml_2.3.10               httpuv_1.6.15            
##  [19] spam_2.11-1               zip_2.3.2                
##  [21] sp_2.2-0                  spatstat.sparse_3.1-0    
##  [23] reticulate_1.41.0.1       DBI_1.2.3                
##  [25] cowplot_1.1.3             pbapply_1.7-2            
##  [27] abind_1.4-8               zlibbioc_1.52.0          
##  [29] Rtsne_0.17                mixtools_2.0.0.1         
##  [31] R.utils_2.13.0            GenomeInfoDbData_1.2.13  
##  [33] irlba_2.3.5.1             listenv_0.9.1            
##  [35] spatstat.utils_3.1-3      MatrixModels_0.5-3       
##  [37] goftest_1.2-3             spatstat.random_3.3-2    
##  [39] fitdistrplus_1.2-2        parallelly_1.42.0        
##  [41] DelayedMatrixStats_1.28.1 leiden_0.4.3.1           
##  [43] codetools_0.2-20          DelayedArray_0.32.0      
##  [45] tidyselect_1.2.1          UCSC.utils_1.2.0         
##  [47] farver_2.1.2              spatstat.explore_3.3-4   
##  [49] jsonlite_1.9.1            progressr_0.15.1         
##  [51] Formula_1.2-5             ggridges_0.5.6           
##  [53] survival_3.8-3            segmented_2.1-4          
##  [55] tools_4.4.3               ica_1.0-3                
##  [57] Rcpp_1.0.14               glue_1.8.0               
##  [59] gridExtra_2.3             SparseArray_1.6.2        
##  [61] xfun_0.51                 withr_3.0.2              
##  [63] BiocManager_1.30.25       fastmap_1.2.0            
##  [65] SparseM_1.84-2            digest_0.6.37            
##  [67] timechange_0.3.0          R6_2.6.1                 
##  [69] mime_0.12                 colorspace_2.1-1         
##  [71] scattermore_1.2           tensor_1.5               
##  [73] RSQLite_2.3.9             spatstat.data_3.1-6      
##  [75] R.methodsS3_1.8.2         generics_0.1.3           
##  [77] httr_1.4.7                htmlwidgets_1.6.4        
##  [79] S4Arrays_1.6.0            uwot_0.2.3               
##  [81] pkgconfig_2.0.3           gtable_0.3.6             
##  [83] blob_1.2.4                lmtest_0.9-40            
##  [85] XVector_0.46.0            htmltools_0.5.8.1        
##  [87] dotCall64_1.2             scales_1.3.0             
##  [89] png_0.1-8                 spatstat.univar_3.1-2    
##  [91] rstudioapi_0.17.1         tzdb_0.5.0               
##  [93] reshape2_1.4.4            nlme_3.1-167             
##  [95] cachem_1.1.0              zoo_1.8-14               
##  [97] KernSmooth_2.23-26        parallel_4.4.3           
##  [99] miniUI_0.1.1.1            pillar_1.10.1            
## [101] grid_4.4.3                vctrs_0.6.5              
## [103] RANN_2.6.2                promises_1.3.2           
## [105] xtable_1.8-4              cluster_2.1.8            
## [107] evaluate_1.0.3            cli_3.6.4                
## [109] locfit_1.5-9.12           compiler_4.4.3           
## [111] rlang_1.1.5               crayon_1.5.3             
## [113] future.apply_1.11.3       labeling_0.4.3           
## [115] plyr_1.8.9                stringi_1.8.4            
## [117] viridisLite_0.4.2         deldir_2.0-4             
## [119] BiocParallel_1.40.0       Biostrings_2.74.1        
## [121] munsell_0.5.1             lazyeval_0.2.2           
## [123] spatstat.geom_3.3-5       quantreg_6.1             
## [125] Matrix_1.7-2              hms_1.1.3                
## [127] sparseMatrixStats_1.18.0  bit64_4.6.0-1            
## [129] future_1.34.0             KEGGREST_1.46.0          
## [131] shiny_1.10.0              kernlab_0.9-33           
## [133] ROCR_1.0-11               memoise_2.0.1            
## [135] igraph_2.1.4              bslib_0.9.0              
## [137] bit_4.6.0                 polynom_1.4-1